**********************************************************************************************
* Do-file used to impute owner-occupied housing assets based on wealth wealth surveys        *
**********************************************************************************************

* Imputation based on age groups.groups of total income

* I) Organize the data
***************************

	* Load the data
		use "$wfenl/enl_1970_2010", clear

	* Missing values or outliers
		foreach num of numlist 1970 1973 1978 1984 1988 1992 1996 2002 2006 2010{
			summ patfon_owner [aw=pond] if year==`num' & patfon_owner>0
			di r(mean)
			di 0.1*r(mean)
			scalar threshold = 0.1*r(mean)
			count if patfon_owner==0 & year==`num' 
			scalar nb0 = r(N)
			di "nb zeros " nb0
			count if patfon_owner<threshold & year==`num' & patfon_owner>0
			scalar nb1 = r(N)
			di "nb suppressions " nb1
			count if patfon_owner>0 & !missing(patfon_owner) & year==`num'
			scalar nb2 = r(N)
			di "nb positive values " nb2
			scalar nb_supp_`num'=nb1/nb2
			di "% supp " nb_supp_`num'
			replace patfon_owner=0 if year==`num' & patfon_owner<threshold & patfon_owner>=0		
			scalar drop nb1 nb2 threshold
		}

		foreach num of numlist 1970 1973 1978 1984 1988 1992 1996 2002 2006 2010{
			di "% suppression for year " `num' ": " nb_supp_`num'
		}


	* Variables used for imputations

		* Age groups
			* 20-24 y.o, 25-29 y.o, 30-39 y.o, 40-49 y.o, 49-59 y.o, 59-69 y.o et > 69 y.o
			capture drop q_age_pr
			gen q_age_pr = 1
			replace q_age_pr = 2 if age >24
			replace q_age_pr = 3 if age >30
			replace q_age_pr = 4 if age >39
			replace q_age_pr = 5 if age >49
			replace q_age_pr = 6 if age >54
			replace q_age_pr = 7 if age >60
			replace q_age_pr = 8 if age >65
			replace q_age_pr = 9 if age >70
			*tab q_age_pr
			* Adjustment of age groups because of missing values on "patfon_owner"
				replace q_age_pr=7 if inlist(q_age_pr,8,9) & year==1970
				replace q_age_pr=8 if inlist(q_age_pr,9) & (year==1973 | year==2002)
				replace q_age_pr=2 if inlist(q_age_pr,1) & inlist(year,2006,2010)
				
		* Age.groups of total revome
			* For each age groups, decomposition in 7 groups of total revome P0-25, P25-50, P50-75, P75-90, P90-95, P95-99, P99-100
			sort year q_age_pr rev_tot
			gen u=runiform()
			replace rev_tot=u if rev_tot==0
				
			gen pondvr=round(100*pondn) 
			bys year q_age_pr : cumul rev_tot [w=pondvr], gen (p100)
			replace p100=p100*100
			capture drop q_age_rev 
			gen q_age_rev   = 0
			replace q_age_rev= 25   if p100 >=25 & p100 <50 & q_age_rev==0
			replace q_age_rev= 50   if p100 >=50 & p100 <75 & q_age_rev==0
			replace q_age_rev= 75   if p100 >=75 & p100 <90 & q_age_rev==0
			replace q_age_rev= 90   if p100 >=90 & p100 <95 & q_age_rev==0
			replace q_age_rev= 95   if p100 >=95 & p100 <99 & q_age_rev==0
			replace q_age_rev= 99   if p100 >=99 & q_age_rev==0
			drop p100 pondvr u
			*tab q_age_rev [aw=pondn]
			* Adjustments
				*summ homeow [aw=pondn] if q_age_pr==1 
				* For less than 30 y.o : only 5% of homeowners => Need to agregate P95-99 and P99-100 into P95-100
				replace q_age_rev=95 if (q_age_pr<3) & (q_age_rev==95 | q_age_rev==99)
				replace q_age_rev=95 if (q_age_pr>7) & (q_age_rev==95 | q_age_rev==99) & year==2002

* II) Statistics for the imputation
*************************************	
	
	* Proportion of owner-occupied housing
		gen prop_immo =0 
		replace prop_immo =pondn if homeowner==1
	
	* Weighted amounts
		replace patfon_owner=patfon_owner*pondn
		* Total number of people by groups
			bys year q_age_pr q_age_rev : egen tot=sum(pondn)
		* Number of homeowner  with amount of housing assets reported (i.e purchase less than 4 years before the survey)
			bys year q_age_pr q_age_rev : egen tot2=sum(pondn*(!missing(patfon_owner))*(patfon_owner>0))
		* Number of homeowner
			bys year q_age_pr q_age_rev : egen tot3=sum(pondn*(!missing(patfon_owner)))
		* Number of observations of homeowner  with amount of housing assets reported (i.e purchase less than 4 years before the survey)
			gen a =1
			bys year q_age_pr q_age_rev : egen obs=sum(a*(!missing(patfon_owner))*(patfon_owner>0))
			drop a
		
		collapse (sum) patfon_owner prop_immo (mean) obs tot tot2 tot3,by(year q_age_pr q_age_rev)
			
		* Proportion of owner-occupied housing by groups
			replace prop_immo=prop_immo/tot
		*  Average owner-occupied housing assets among homeowner with purchass < 4 years	
			gen patfon_owner_mean=patfon_owner/tot2
		* Applied this average value to all homeowner of the groups
			replace patfon_owner=patfon_owner_mean*tot3		
		* Share of total owner-occupied housing assets accruing to each group
			bys year :egen patfon_owner_tot=sum(patfon_owner) 
			gen share_patimmo=100*patfon_owner/patfon_owner_tot
		
		sort year q_age_pr q_age_rev
		
	/* Visual inspection
		replace patfon_owner=patfon_owner/1000
		separate patfon_owner, by(q_age_pr)
		cd "$wfenl/temp_graph"
		foreach num of numlist 1970 1973 1978 1984 1988 1992 1996 2002 2006 2010{
			graph twoway (line patfon_owner1-patfon_owner3 q_age_rev, sort) if year==`num' , xtitle(name) saving (group13)
			twoway (line patfon_owner4-patfon_owner7 q_age_rev, sort) if year==`num'  , saving (group47)
			twoway (line patfon_owner8-patfon_owner9 q_age_rev, sort) if year==`num', saving (group89)

			gr combine group13.gph group47.gph group89.gph, title("Year `num'") 
			graph export combine_`num'.png, replace
			erase group13.gph
			erase group47.gph 
			erase group89.gph
		}
		*/
		keep year q* share* prop* 
		
		tempfile base
		save "`base'",replace
		
	
* III) Harmonization and correction
*************************************
	sort year q_age_pr q_age_rev
	
	* Interpolation for missing years		
	
		* Add missing years
			clear 
			set obs 9
			gen q_age_pr=_n
			expand 7
			sort q_age_pr
			gen q_age_rev=0
			replace q_age_rev=q_age_rev[_n-1]+25 if q_age_pr==q_age_pr[_n-1] & q_age_rev<=50
			replace q_age_rev=90 if q_age_rev==100
			replace q_age_rev=95 if q_age_rev==125
			replace q_age_rev=99 if q_age_rev==150
			drop if q_age_pr<3 & q_age_rev==99
			foreach var of newlist year prop_immo share_patimmo  {
				gen `var'=.
			}
			tempfile temp
			save "`temp'",replace
			use "`base'",clear
			drop if year==1984
			foreach num of numlist 1971 1972 1974/1977 1979/1987  1989/1991 1993/1995 1997/2001 2003/2005 2007/2009  {
				append using "`temp'"
				replace year=`num' if year==.
			}
			sort year q_age_pr q_age_rev
		
		* References years: 1970 1973 1978  1988 1992 1996 2002 2006 2010
			erase "`temp'"
			erase "`base'"
			tempfile newbase
			save "`newbase'",replace
			tempfile temp
			save "`temp'",replace
			foreach num of numlist 1970 1973 1978 /*1984*/ 1988 1992 1996 2002 2006 2010{
			use "`temp'",clear
			keep if year==`num'
			foreach var of varlist  prop_immo share_patimmo {
				rename `var' `var'`num'
			}
			drop year
			merge m:m q_age_pr q_age_rev using "`newbase'"
			drop _merge
			erase "`newbase'"
			tempfile newbase
			save "`newbase'",replace
		}
		erase "`temp'"
		sort year q_age_pr q_age_rev
		
		* Missing groups for both reference years during the given subperiod
			drop if q_age_pr>8 & year<1973
			drop if q_age_pr==1 & year>2006 & year<2010
		
	* Interpolation
		
		* Period 1970-1973
			* Missing groups for only one of the two reference years druing the given subperiod
			* Age group = 8 for year 1970
				foreach var of varlist  prop_immo  {
					replace `var'1970=`var'1970[_n-7] if q_age_pr==8 & year>1970 & year<1973
				}
				foreach var of varlist   share_patimmo  {
					replace `var'1970=`var'1970[_n-7]*`var'1973/(`var'1973+`var'1973[_n-7]) if q_age_pr==8 & year>1970 & year<1973
					replace `var'1970=`var'1970*`var'1973/(`var'1973+`var'1973[_n+7])       if q_age_pr==7 & year>1970 & year<1973
				}
			local i =1970
			local j=1973
			foreach var of varlist  prop_immo share_patimmo  {
				replace `var'=`var'`i'+(`var'`j' -`var'`i')*(year-`i')/(`j'- `i') if year>`i' & year< `j'
			}
			drop  prop_immo`i' share_patimmo`i'
		
		* Period 1973-1978
			* Missing groups for only one of the two reference years druing the given subperiod
			* Missing Age group =9 for year 1973 
				foreach var of varlist  prop_immo  {
					replace `var'1973=`var'1973[_n-7] if q_age_pr==9 & year>1973 & year<1978
				}
				foreach var of varlist   share_patimmo  {
					replace `var'1973=`var'1973[_n-7]*`var'1978/(`var'1978+`var'1978[_n-7]) if q_age_pr==9 & year>1973 & year<1978
					replace `var'1973=`var'1973*`var'1978/(`var'1978+`var'1978[_n+7])       if q_age_pr==8 & year>1973 & year<1978
				}
			local i =1973
			local j=1978			
			foreach var of varlist  prop_immo share_patimmo  {
				replace `var'=`var'`i'+(`var'`j' -`var'`i')*(year-`i')/(`j'- `i') if year>`i' & year< `j'
			}
			drop  prop_immo`i' share_patimmo`i'
		
		/* Period 1978-1984
			local i =1978
			local j=1984
			foreach var of varlist  prop_immo share_patimmo  {
				replace `var'=`var'`i'+(`var'`j' -`var'`i')*(year-`i')/(`j'- `i') if year>`i' & year< `j'
			}
			drop  prop_immo`i' share_patimmo`i'
		
		* Period 1984-1988
			local i =1984
			local j=1988
			foreach var of varlist  prop_immo share_patimmo  {
				replace `var'=`var'`i'+(`var'`j' -`var'`i')*(year-`i')/(`j'- `i') if year>`i' & year< `j'
			}
			drop  prop_immo`i' share_patimmo`i' */
		
		* Period 1978-1988
			local i =1978
			local j=1988
			foreach var of varlist  prop_immo share_patimmo  {
				replace `var'=`var'`i'+(`var'`j' -`var'`i')*(year-`i')/(`j'- `i') if year>`i' & year< `j'
			}
			drop  prop_immo`i' share_patimmo`i'
			
		* Period 1988-1992
			local i =1988
			local j=1992
			foreach var of varlist  prop_immo share_patimmo  {
				replace `var'=`var'`i'+(`var'`j' -`var'`i')*(year-`i')/(`j'- `i') if year>`i' & year< `j'
			}
			drop  prop_immo`i' share_patimmo`i'
		
		* Period 1992-1996
			local i =1992
			local j=1996
			foreach var of varlist  prop_immo share_patimmo  {
				replace `var'=`var'`i'+(`var'`j' -`var'`i')*(year-`i')/(`j'- `i') if year>`i' & year< `j'
			}
			drop  prop_immo`i' share_patimmo`i'
		
		* Period 1996-2002
			* Missing groups for only one of the two reference years druing the given subperiod		
			* Year 2002 : Age group =9 and income groups =99 for age group==8
				foreach var of varlist  prop_immo  {
					replace `var'2002=`var'2002[_n-1] if q_age_pr==8 & q_age_rev==99 & ((year>1996 & year<2002) | (year>2002 & year<2006))
					replace `var'2002=`var'2002[_n-7] if q_age_pr==9 & ((year>1996 & year<2002) | (year>2002 & year<2006))
				}
				foreach var of varlist  share_patimmo  {
					* adjustment for missing income groups
					replace `var'2002=`var'2002[_n-1]*`var'2006/(`var'2006+`var'2006[_n-1]) if q_age_pr==8 & q_age_rev==99 & ((year>1996 & year<2002) | (year>2002 & year<2006))
					replace `var'2002=`var'2002*`var'2006/(`var'2006+`var'2006[_n+1])       if q_age_pr==8 & q_age_rev==95 & ((year>1996 & year<2002) | (year>2002 & year<2006))
					* adjustment for missing age groups
					replace `var'2002=`var'2002[_n-7]*`var'2006/(`var'2006+`var'2006[_n-7]) if q_age_pr==9 & ((year>1996 & year<2002) | (year>2002 & year<2006))
					replace `var'2002=`var'2002*`var'2006/(`var'2006+`var'2006[_n+7])       if q_age_pr==8 & ((year>1996 & year<2002) | (year>2002 & year<2006))
				}	
			local i =1996
			local j=2002
			foreach var of varlist  prop_immo share_patimmo  {
				replace `var'=`var'`i'+(`var'`j' -`var'`i')*(year-`i')/(`j'- `i') if year>`i' & year< `j'
			}
			drop  prop_immo`i' share_patimmo`i'
		
		* Period 2002-2006
			* Missing groups for only one of the two reference years during the given subperiod		
			* Year 2006 : Age group =1 
				foreach var of varlist  prop_immo  {
					replace `var'2006=`var'2002 if q_age_pr==1 & year>2002 & year<2006
				}
				foreach var of varlist  share_patimmo  {
					replace `var'2006=`var'2006[_n+6]*`var'2002/(`var'2002+`var'2002[_n+6]) if q_age_pr==1 & year>2002 & year<2006
					replace `var'2006=`var'2006*`var'2002/(`var'2002+`var'2002[_n-6])       if q_age_pr==2 & year>2002 & year<2006
				}			
			local i =2002
			local j=2006
			foreach var of varlist  prop_immo share_patimmo  {
				replace `var'=`var'`i'+(`var'`j' -`var'`i')*(year-`i')/(`j'- `i') if year>`i' & year< `j'
			}
			drop  prop_immo`i' share_patimmo`i'
		
		* Period 2006-2010
			local i =2006
			local j=2010
			foreach var of varlist  prop_immo share_patimmo  {
				replace `var'=`var'`i'+(`var'`j' -`var'`i')*(year-`i')/(`j'- `i') if year>`i' & year< `j'
			}
			drop  prop_immo`i' share_patimmo`i'  prop_immo`j' share_patimmo`j'
	
	* Add 	ratio debt/gross housing assets from wealth surveys
		merge 1:1 year q_age_pr q_age_rev using "$param/Imputation/clef_immo_EPrevtot.dta",keepusing(ratio_pass_owner)	
		drop if _merge==2
		drop _merge
		sort year q_age_rev q_age_pr
		replace ratio_pass_owner=ratio_pass_owner[_n+1] if q_age_pr==1 & q_age_pr[_n+1]==2 
		save "$param/Imputation/clef_immo_enl.dta",replace

		* For year before 1992
			use "$param/Imputation/clef_immo_EPrevtot.dta",clear
			keep if year==1992
			drop year prop_immo share_patimmo
			sort q_age_rev q_age_pr
			replace ratio_pass_owner=ratio_pass_owner[_n+1] if q_age_pr==1 & q_age_pr[_n+1]==2 
			rename ratio_pass_owner ratio_pass_owner2
			merge 1:m q_age_pr q_age_rev using "$param/Imputation/clef_immo_enl.dta"	
			replace ratio_pass_owner=ratio_pass_owner2 if year<1992
			drop if _merge==1
			drop _merge ratio_pass_owner2
	order year q_age_pr q_age_rev 
	sort year q_age_pr q_age_rev 
save "$param/Imputation/clef_immo_ENL.dta",replace
			


